import pandas as pd
import os

def merge_csv(root_dir, output_file):
    merged_data = pd.DataFrame()
    id_counter = 0

    for subdir, dirs, files in os.walk(root_dir):
        for file in files:
            if file.endswith('.csv'):
                file_path = os.path.join(subdir, file)
                temp_df = pd.read_csv(file_path)

                # 检查列名是否正确
                if list(temp_df.columns) == ['path', 'prompt', 'transcript']:
                    temp_df['id'] = range(id_counter, id_counter + len(temp_df))
                    id_counter += len(temp_df)

                    merged_data = pd.concat([merged_data, temp_df], ignore_index=True)

    merged_data.to_csv(output_file, index=False)

# 使用函数
root_directory = '/root/autodl-tmp/TEDspliter/increment_TED/train/stm'  # 替换为你的目录路径
output_csv = '/root/autodl-tmp/TEDspliter/increment_TED/merged.csv'  # 输出文件的名称
merge_csv(root_directory, output_csv)
